library(fpp)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1
## 
## 
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(TTR)
library(readr)

air <- read_csv("~/Downloads/airtraffic.csv")
## Rows: 249 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (5): Year, Month, Dom_LF, Int_LF, LF
## num (12): Dom_Pax, Int_Pax, Pax, Dom_Flt, Int_Flt, Flt, Dom_RPM, Int_RPM, RP...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Air_Raw <- air$Flt
Air_ts <- ts(Air_Raw, frequency = 12, start = c(2003,1))
plot(Air_ts)

Air_ts
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2003 842827 741610 856120 821265 844662 856576 894576 894497 835821 872580
## 2004 845530 817440 895125 878354 895946 901172 940939 951311 875835 919950
## 2005 872141 820306 935395 904552 941186 933236 961902 964102 876747 892282
## 2006 851600 776504 894057 862927 892727 891734 928415 941286 862487 891323
## 2007 873558 787368 902740 879700 914511 904960 941191 949167 869349 905715
## 2008 864080 813081 891960 870504 891901 888697 921981 898759 788085 819535
## 2009 776904 721118 817127 795925 812825 826477 865158 851337 767489 784846
## 2010 761435 682832 809631 788620 809605 821753 858562 854246 779011 799678
## 2011 749578 687145 826594 788736 811458 826578 860116 839522 768040 786043
## 2012 745717 711905 801999 775919 795525 807244 833647 827637 740518 757502
## 2013 730667 670514 783938 762993 792128 792262 828902 822246 745390 772968
## 2014 693436 640660 772440 747917 768849 778423 815881 800478 727898 757294
## 2015 704627 632620 756818 740629 761713 771398 811289 799989 720759 751372
## 2016 707237 678081 771996 746635 775065 788704 809770 806894 735933 751607
## 2017 716411 655855 770634 745517 779642 795324 820949 810895 712042 756360
## 2018 723111 663958 773066 767348 797342 809686 841499 833431 751454 789191
## 2019 739803 673087 806233 781801 815270 820663 851326 848650 769351 805988
## 2020 770528 724758 667244 222280 257446 280256 427918 459910 397791 433913
## 2021 449262 396590 541632 555249 610401 666566 717100 712664 654042 677379
## 2022 628054 586629 691979 679597 709698 708118 739118 726120 679388 697047
## 2023 672803 628345 726912 706238 739255 736572 764677 768619 713549       
##         Nov    Dec
## 2003 819659 855970
## 2004 878360 899701
## 2005 856725 867307
## 2006 854346 874036
## 2007 868871 874306
## 2008 767933 785484
## 2009 753473 769641
## 2010 765076 768595
## 2011 743438 769169
## 2012 731077 738651
## 2013 719062 731424
## 2014 713691 737455
## 2015 714668 730730
## 2016 719350 734434
## 2017 719555 738657
## 2018 742649 764065
## 2019 758402 793144
## 2020 450701 467620
## 2021 668512 665686
## 2022 664820 663948
## 2023
#Shows the number of flights monthly from 2003 to 2023. Sharp drop in 2020 due to COVID.
air_win <- window(Air_ts, start = c(2021,6))
#Will use a window starting from June 2021, since that is where the air traffic returned to normal from COVID.
plot(air_win)

Acf(air_win)

#Acf shows no strong trend in the data.

summary(air_win)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  586629  666346  694513  692623  719355  768619
boxplot(air_win)

#Average number of flights in the US monthly is 692623. The maximum amount in any month is 768619.

decompose <- decompose(air_win)
plot(decompose)

decompose
## $x
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2021                                    666566 717100 712664 654042 677379
## 2022 628054 586629 691979 679597 709698 708118 739118 726120 679388 697047
## 2023 672803 628345 726912 706238 739255 736572 764677 768619 713549       
##         Nov    Dec
## 2021 668512 665686
## 2022 664820 663948
## 2023              
## 
## $seasonal
##              Jan         Feb         Mar         Apr         May         Jun
## 2021                                                              27610.0747
## 2022 -36456.1962 -81554.6128  17998.4288   -471.5087  28963.8247  27610.0747
## 2023 -36456.1962 -81554.6128  17998.4288   -471.5087  28963.8247  27610.0747
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2021  56817.9497  40217.2413  -9708.4670   5384.9497 -29183.6337 -19618.0503
## 2022  56817.9497  40217.2413  -9708.4670   5384.9497 -29183.6337 -19618.0503
## 2023  56817.9497  40217.2413  -9708.4670                                    
## 
## $trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2021                                                    NA       NA       NA
## 2022 675872.2 677350.3 678967.1 680842.7 681508.3 681282.1 683074.2 686676.9
## 2023 699445.5 702281.2 705475.4       NA       NA       NA       NA       NA
##           Sep      Oct      Nov      Dec
## 2021       NA       NA       NA 673223.5
## 2022 689870.6 692436.2 694777.8 697194.9
## 2023       NA                           
## 
## $random
##             Jan        Feb        Mar        Apr        May        Jun
## 2021                                                                NA
## 2022 -11362.054  -9166.720  -4986.512   -774.158   -774.158   -774.158
## 2023   9813.738   7618.405   3438.196         NA         NA         NA
##             Jul        Aug        Sep        Oct        Nov        Dec
## 2021         NA         NA         NA         NA         NA  12080.550
## 2022   -774.158   -774.158   -774.158   -774.158   -774.158 -13628.866
## 2023         NA         NA         NA                                 
## 
## $figure
##  [1]  27610.0747  56817.9497  40217.2413  -9708.4670   5384.9497 -29183.6337
##  [7] -19618.0503 -36456.1962 -81554.6128  17998.4288   -471.5087  28963.8247
## 
## $type
## [1] "additive"
## 
## attr(,"class")
## [1] "decomposed.ts"
#Time series is additive.
#The number of flights is lowest in November, December, January, and February. It is highest in May, June, July, and August. Therefore, this data is seasonal. The number of flights is higher in the summer months because the weather is warmer and people tend to take off work and travel. The total flights are lower in the winter months since the weather is cold and there are also a lot of holidays. People tend to stay home with their family during the holidays.

plot(air_win)
seasonadj_air <- seasadj(decompose)
lines(seasonadj_air, col = "blue")

#There are some large fluctuations between the time series and the seasonally adjusted plot. This means the seasonal fluctuations are very large in the data.

#Naive Method
naive_forecast <- naive(air_win, 12)
plot(naive_forecast)

res_air <- naive_forecast$residuals
plot(res_air)

#Naive forecast predicts a constant number for the next year.
#Plot of residuals shows large fluctuating errors, but no trend over time.
hist(res_air)

#Most of the residuals occur between the values of -50,000 and 50,000. There a lot of large errors for the naive model.
plot(as.numeric(naive_forecast$fitted), as.numeric(res_air))

plot(as.numeric(naive_forecast$x), as.numeric(res_air))

#Both the fitted and actual forecast do not have any correlation with the residuals.
Acf(res_air)

#No trend in reisudals as there are only two lines above the line of significance at lag 6 and 12.
accuracy(naive_forecast)
##                    ME     RMSE      MAE        MPE     MAPE    MASE      ACF1
## Training set 1740.111 40245.58 30146.33 0.07588592 4.384556 1.10695 -0.343988
#MAPE is 4.3846 which is high.
plot(naive_forecast)

naive_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2023         713549 661972.2 765125.8 634669.1 792428.9
## Nov 2023         713549 640608.4 786489.6 601996.0 825102.0
## Dec 2023         713549 624215.4 802882.6 576925.0 850173.0
## Jan 2024         713549 610395.4 816702.6 555789.2 871308.8
## Feb 2024         713549 598219.8 828878.2 537168.2 889929.8
## Mar 2024         713549 587212.2 839885.8 520333.5 906764.5
## Apr 2024         713549 577089.6 850008.4 504852.4 922245.6
## May 2024         713549 567667.8 859430.2 490443.0 936655.0
## Jun 2024         713549 558818.6 868279.4 476909.3 950188.7
## Jul 2024         713549 550448.9 876649.1 464108.9 962989.1
## Aug 2024         713549 542488.1 884609.9 451934.0 975164.0
## Sep 2024         713549 534881.8 892216.2 440301.0 986797.0
#Overall, not the best. Predicts 713549 flights for every month of the next year.

#Simple Smoothing
ets_air <- ets(air_win)
ets_forecast <- forecast(ets_air, h=12)
plot(ets_forecast)

ets_air
## ETS(A,N,N) 
## 
## Call:
## ets(y = air_win)
## 
##   Smoothing parameters:
##     alpha = 0.5897 
## 
##   Initial states:
##     l = 680302.5868 
## 
##   sigma:  38093.85
## 
##      AIC     AICc      BIC 
## 687.9040 688.9040 691.9006
#Forecasts a straight line for the next 12 months.
res_ets <- ets_forecast$residuals
plot(res_ets)

#Residuals do not show trend with time.
hist(res_ets)

#Most errors are around 0, but a lot are also around -50,000.
plot(as.numeric(ets_forecast$fitted), as.numeric(res_ets))

plot(as.numeric(ets_forecast$x), as.numeric(res_ets))

#Residuals points are scattered in both plots. No visible trend between the fitted/actuals plots and the residuals.
Acf(res_ets)

#Only significant lines at lag 6 and 12. No trend in residuals.
accuracy(ets_forecast)
##                    ME     RMSE      MAE     MPE     MAPE     MASE        ACF1
## Training set 3201.761 36708.16 29211.68 0.24674 4.256593 1.072631 -0.02053556
plot(ets_forecast)

ets_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2023         733171 684351.8 781990.3 658508.4 807833.6
## Nov 2023         733171 676494.9 789847.1 646492.4 819849.6
## Dec 2023         733171 669601.9 796740.2 635950.4 830391.7
## Jan 2024         733171 663386.4 802955.7 626444.6 839897.4
## Feb 2024         733171 657680.9 808661.1 617718.9 848623.2
## Mar 2024         733171 652377.4 813964.7 609607.8 856734.3
## Apr 2024         733171 647401.1 818940.9 601997.3 864344.8
## May 2024         733171 642698.2 823643.9 594804.7 871537.3
## Jun 2024         733171 638227.9 828114.2 587968.0 878374.0
## Jul 2024         733171 633958.8 832383.2 581439.1 884903.0
## Aug 2024         733171 629866.1 836476.0 575179.7 891162.4
## Sep 2024         733171 625929.3 840412.7 569159.0 897183.0
#HoltWinters
HW_air <- HoltWinters(air_win)
HW_forecast <- forecast(HW_air, h=12)
plot(HW_forecast)

HW_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2023       733816.8 718857.0 748776.7 710937.7 756696.0
## Nov 2023       702511.5 687355.8 717667.3 679332.9 725690.2
## Dec 2023       728195.1 712758.3 743632.0 704586.5 751803.7
## Jan 2024       691177.7 675362.9 706992.4 666991.0 715364.3
## Feb 2024       651537.9 635239.4 667836.3 626611.6 676464.2
## Mar 2024       758534.4 741640.7 775428.2 732697.6 784371.2
## Apr 2024       747540.1 729936.3 765144.0 720617.3 774462.9
## May 2024       780238.7 761809.5 798668.0 752053.6 808423.9
## Jun 2024       782148.3 762779.9 801516.6 752526.9 811769.6
## Jul 2024       814619.4 794201.5 835037.4 783392.9 845846.0
## Aug 2024       801282.0 779708.4 822855.6 768288.1 834276.0
## Sep 2024       754619.6 731789.2 777450.0 719703.5 789535.7
res_HW <- HW_forecast$residuals
plot(res_HW)

#Some large errors around 2023.
hist(res_HW)

#Most errors are concentrated between -10,000 and 10,000.
plot(as.numeric(HW_forecast$fitted), as.numeric(res_HW))

plot(as.numeric(HW_forecast$x), as.numeric(res_HW))

#No correlation between the residuals and fitted/actual forecasts. Generally concentrated around 0 except for a few outliers.
Acf(res_HW)

#No trend in residuals.
accuracy(HW_forecast)
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 2687.366 11617.68 9200.157 0.4072509 1.329879 0.3378227
##                     ACF1
## Training set 0.008274902
#Very low MAPE at 1.33. RMSE is also 11617.68, which is much lower than the other forecasts so far.
plot(HW_forecast)

HW_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2023       733816.8 718857.0 748776.7 710937.7 756696.0
## Nov 2023       702511.5 687355.8 717667.3 679332.9 725690.2
## Dec 2023       728195.1 712758.3 743632.0 704586.5 751803.7
## Jan 2024       691177.7 675362.9 706992.4 666991.0 715364.3
## Feb 2024       651537.9 635239.4 667836.3 626611.6 676464.2
## Mar 2024       758534.4 741640.7 775428.2 732697.6 784371.2
## Apr 2024       747540.1 729936.3 765144.0 720617.3 774462.9
## May 2024       780238.7 761809.5 798668.0 752053.6 808423.9
## Jun 2024       782148.3 762779.9 801516.6 752526.9 811769.6
## Jul 2024       814619.4 794201.5 835037.4 783392.9 845846.0
## Aug 2024       801282.0 779708.4 822855.6 768288.1 834276.0
## Sep 2024       754619.6 731789.2 777450.0 719703.5 789535.7
#Forecast follows the seasonal trends of air traffic in the US. Definitely the best so far.

#ARIMA
Pacf(air_win)

#Must difference because Pacf has one significiant point at lag 1 and immediately drops off.
ndiffs(air_win)
## [1] 1
#There is 1 difference needed to make it stationary.
tsdisplay(air_win)

air_windiff1 <- diff(air_win, differences=1)
plot(air_windiff1)

tsdisplay(air_windiff1)

auto_fit <- auto.arima(air_win, trace=TRUE, stepwise = FALSE)
## 
##  ARIMA(0,0,0)(0,1,0)[12]                    : 377.6012
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 355.8378
##  ARIMA(0,0,1)(0,1,0)[12]                    : 367.228
##  ARIMA(0,0,1)(0,1,0)[12] with drift         : 354.3083
##  ARIMA(0,0,2)(0,1,0)[12]                    : 365.2007
##  ARIMA(0,0,2)(0,1,0)[12] with drift         : 357.7845
##  ARIMA(0,0,3)(0,1,0)[12]                    : 368.626
##  ARIMA(0,0,3)(0,1,0)[12] with drift         : 360.7292
##  ARIMA(0,0,4)(0,1,0)[12]                    : Inf
##  ARIMA(0,0,4)(0,1,0)[12] with drift         : Inf
##  ARIMA(0,0,5)(0,1,0)[12]                    : Inf
##  ARIMA(0,0,5)(0,1,0)[12] with drift         : Inf
##  ARIMA(1,0,0)(0,1,0)[12]                    : 360.7631
##  ARIMA(1,0,0)(0,1,0)[12] with drift         : 356.8136
##  ARIMA(1,0,1)(0,1,0)[12]                    : 363.482
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 357.8571
##  ARIMA(1,0,2)(0,1,0)[12]                    : Inf
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : Inf
##  ARIMA(1,0,3)(0,1,0)[12]                    : Inf
##  ARIMA(1,0,3)(0,1,0)[12] with drift         : Inf
##  ARIMA(1,0,4)(0,1,0)[12]                    : Inf
##  ARIMA(1,0,4)(0,1,0)[12] with drift         : Inf
##  ARIMA(2,0,0)(0,1,0)[12]                    : 363.8195
##  ARIMA(2,0,0)(0,1,0)[12] with drift         : 357.7598
##  ARIMA(2,0,1)(0,1,0)[12]                    : Inf
##  ARIMA(2,0,1)(0,1,0)[12] with drift         : 361.5047
##  ARIMA(2,0,2)(0,1,0)[12]                    : Inf
##  ARIMA(2,0,2)(0,1,0)[12] with drift         : Inf
##  ARIMA(2,0,3)(0,1,0)[12]                    : Inf
##  ARIMA(2,0,3)(0,1,0)[12] with drift         : Inf
##  ARIMA(3,0,0)(0,1,0)[12]                    : 363.2131
##  ARIMA(3,0,0)(0,1,0)[12] with drift         : 361.4417
##  ARIMA(3,0,1)(0,1,0)[12]                    : 367.4622
##  ARIMA(3,0,1)(0,1,0)[12] with drift         : 366.7085
##  ARIMA(3,0,2)(0,1,0)[12]                    : Inf
##  ARIMA(3,0,2)(0,1,0)[12] with drift         : Inf
##  ARIMA(4,0,0)(0,1,0)[12]                    : 367.4735
##  ARIMA(4,0,0)(0,1,0)[12] with drift         : 366.5294
##  ARIMA(4,0,1)(0,1,0)[12]                    : 372.7396
##  ARIMA(4,0,1)(0,1,0)[12] with drift         : Inf
##  ARIMA(5,0,0)(0,1,0)[12]                    : Inf
##  ARIMA(5,0,0)(0,1,0)[12] with drift         : 371.9212
## 
## 
## 
##  Best model: ARIMA(0,0,1)(0,1,0)[12] with drift
Acf(air_windiff1)

Pacf(air_windiff1)

#Best ARIMA model is ARIMA(0,0,1)(0,1,0)[12] with drift. Drift indicates a slight increasing trend of total flights. The model says the time series is seasonal and there is one seasonal difference being used.
auto_fit
## Series: air_win 
## ARIMA(0,0,1)(0,1,0)[12] with drift 
## 
## Coefficients:
##          ma1      drift
##       0.6440  2244.9556
## s.e.  0.2134   398.8647
## 
## sigma^2 = 162913382:  log likelihood = -173.15
## AIC=352.31   AICc=354.31   BIC=354.63
attributes(auto_fit)
## $names
##  [1] "coef"      "sigma2"    "var.coef"  "mask"      "loglik"    "aic"      
##  [7] "arma"      "residuals" "call"      "series"    "code"      "n.cond"   
## [13] "nobs"      "model"     "xreg"      "bic"       "aicc"      "x"        
## [19] "fitted"   
## 
## $class
## [1] "forecast_ARIMA" "ARIMA"          "Arima"
plot(forecast(auto_fit,h=5,level=c(80)))

#This model has the lowest BIC of 354.63.
#Shows there is an 80% chance the actual value lies in the forecast range.
res_arima <- auto_fit$residuals
plot(res_arima)

Acf(res_arima)

hist(res_arima)

plot(as.numeric(auto_fit$fitted), as.numeric(res_arima))

plot(as.numeric(auto_fit$x), as.numeric(res_arima))

#Most errors are between 0 and 10,000 which are smaller errors for this data.
#No trend in residuals as shown in ACF.
#Most residuals are concentrated around 0 for both actual and fitted forecasts, with the exception of a few outliers.
accuracy(auto_fit)
##                    ME     RMSE      MAE         MPE      MAPE      MASE
## Training set 94.89433 9025.336 5640.899 0.003476053 0.8080238 0.2071295
##                     ACF1
## Training set -0.04472245
arima_forecast <- forecast(auto_fit, h=12)
plot(arima_forecast)

arima_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2023       722290.8 705933.4 738648.2 697274.3 747307.3
## Nov 2023       691759.5 672303.9 711215.0 662004.7 721514.2
## Dec 2023       690887.5 671431.9 710343.0 661132.7 720642.2
## Jan 2024       699742.5 680286.9 719198.0 669987.7 729497.2
## Feb 2024       655284.5 635828.9 674740.0 625529.7 685039.2
## Mar 2024       753851.5 734395.9 773307.0 724096.7 783606.2
## Apr 2024       733177.5 713721.9 752633.0 703422.7 762932.2
## May 2024       766194.5 746738.9 785650.0 736439.7 795949.2
## Jun 2024       763511.5 744055.9 782967.0 733756.7 793266.2
## Jul 2024       791616.5 772160.9 811072.0 761861.7 821371.2
## Aug 2024       795558.5 776102.9 815014.0 765803.7 825313.2
## Sep 2024       740488.5 721032.9 759944.0 710733.7 770243.2
#Forecast follows the seasonal trends of the past while expecting the total flights to slightly increase with time (drift).
#MAPE is 0.8080238, which is the lowest out of all the forecasts. RMSE is 9025.336 which is also the lowest of all the forecasts.
#ARIMA looks like the best forecast.

#Conclusion
#The ARIMA(0,0,1)(0,1,0)[12] with drift model is the best forecast to use to predict the total flights in the US over the next year. The ARIMA follows the seasonal trend of an increase in flights in the summer months, and a sharp decrease in the winter months. It accounts for an expected increase in total flights, due to more planes being built and more people wanting to fly places over time. 
#These seasonal fluctuations occur because people tend to go on vacations and travel in the summer when the weather is warm. Kids are out of school in the summer so it gives families an opportunity to travel places. In the winter, the weather is cold and most people are working or in school. Also, there are many holidays in the fall/winter, so people tend to stay home and be with their family.
#The ARIMA model had the lowest MAPE at 0.8080238, and the lowest RMSE 9025.336, indicating it had the smallest errors of all the forecast. There were no trend in the residuals, and when plotting the forecast vs the residuals, the errors were all concentrated around 0. The histogram of the residuals showed the vast majority of the errors to be within in -10,000 and 10,000 which is small for this data set. These accuracy measures all contributed to choosing ARIMA as the best model for predicting the total flights in the US over the next year.
#Insights
#Airline companies should have more planes and pilots on staff for the summer months, as there will be an increased amount of flights being booked during this time. Airports can also prepare for this by having more air traffic control workers during the summer, as well as more restaurants open for the larger crowds. In the winter, airline companies can limit the amount of planes being flown to limit mileage and fuel costs.